###################
## Nested Pseudo likelihood Estimation Heterogeneous ***WITH*** fe at parish level
## We include father and siblings information from 1851 and 1881 censuses
## To be run after Mlogit_NAPPHeterofformula1v2.R
## Where we guarantee that KS probabilities are within [0,1]
## We guarantee inner loop (fixed point expectations) convergence using tol 10e-10 for the sup-norm stopping criterion
## We follow KS 2012 and KS 2018 using outer loope iterations k=50 and guaranteeing inner loop convergence (see above)
###################

rm(list=ls(all=TRUE))
set.seed(12345)
#root <- "S:/NetworkParish/HPC/"
root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/" #root <- "C:/HPC/"
#root <- "C:/HPC/"
setwd(paste(root,"Results/ReStat/",sep=""))
#codes <- "C:/HPC/CODE/"
codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/C_ANALYSIS/" #codes <- "C:/HPC/CODE/"
source(paste(codes,"Mlogit_NAPPHeteroFunctionsv2.R",sep=""))
AM <- 0 #if using Aguirregabiria and Mira==1, 0 if using Kasahara and Shimotsu
if( AM==1 ){  rootsave <- paste(root,"Results/ReStat/",sep="") } else   {rootsave <- paste(root,"Results/ReStat/KS/",sep="")}
sink(paste(rootsave,"NAPPEstimationHetewfenewFatherFamily.txt")) 
options(max.print=1000000)
gc()
#######
#Load Data

load(file=paste(root,"NAPP/ReStat/pooleddataNAPPneww50list.RData",sep="" )) # pooleddata, Data per parish in long format (sw) pooled together
load(file=paste(root,"NAPP/ReStat/pooleddataNAPPhomoneww50list.RData",sep="" )) # pooleddatahomo, Data per parish in long format (sw) pooled together for hmogenous
load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPneww50list.RData",sep="" )) #pooleddatalist, Data per parish in wide format (sw.1,sw.2,...) pooled together
load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPhomoneww50list.RData",sep="" )) #pooleddatalisthomo, Data per parish in wide format (sw.1,sw.2,...) pooled together
load(file=paste(root,"NAPP/ReStat/datalistNAPPneww50list.RData",sep="" )) #datalist, Lists of data per parish in wide format (sw.1,sw.2,...)
load(file=paste(root,"NAPP/ReStat/coordslistNAPPneww50list.RData",sep="" )) #coordslist, Lists of coordinates per parish
load(file=paste(root,"NAPP/ReStat/dneighboursNAPPneww50list.RData",sep="")) #dneighbours, Lists of neigbours identities per parish
load(file=paste(root,"NAPP/ReStat/wlistNAPPneww50list.RData",sep="")) #wlist, Sparse Lists of neighbours per parish

####################
##Family data sets

##father
load(file=paste(codes,"father.RData",sep=""))
fatherdata <- matched
fatherdata <- fatherdata[!is.na(fatherdata$HHNUMhead),] ##eliminated null info inds
fatherdata <- fatherdata[!(is.na(fatherdata$father_cat) | fatherdata$father_cat==""),]
HHfather <- unique(fatherdata$HHNUMhead)
fatherdata[fatherdata$father_cat=="professional",]$father_cat <- 0
fatherdata[fatherdata$father_cat=="commercial",]$father_cat <- 1
fatherdata[fatherdata$father_cat=="artisan",]$father_cat <- 2
fatherdata[fatherdata$father_cat=="builder",]$father_cat <- 3
fatherdata[fatherdata$father_cat=="food",]$father_cat <- 4
fatherdata[fatherdata$father_cat=="service",]$father_cat <- 5
fatherdata[fatherdata$father_cat=="domestic",]$father_cat <- 6
#fclass <- factor(fatherdata$father_cat_all) #defining factors based on class 
#class.f <- model.matrix(~ -1 + fclass) #creating columnsdummy vars for each class category
#fatherdata <- cbind(fatherdata,class.f)

##Siblings
load(file=paste(codes,"sibling.RData",sep=""))
siblingdata <- matched
siblingdata <- siblingdata[!is.na(siblingdata$HHNUMhead),] ##eliminated null info inds
siblingdata <- siblingdata[!(is.na(siblingdata$family_id) | siblingdata$family_id==""),] 
nsibling <- siblingdata %>% count(family_id)
siblingdata <- siblingdata[siblingdata$family_id %in% nsibling[nsibling$n>1,]$family_id,] #keeping those families with at least one sibling (and down below those with less than 50 siblings & siblingdata$family_id %in% nsibling[nsibling$n<51,]$family_id), 
HHsibling <- unique(siblingdata$HHNUMhead)

#############
#Definitions

##define which variables we include in the estimation and which formula we use
xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "SEX", "stayerc"
wvar <- "sw"
alter <- sort(unique(pooleddata$alt))
#IDCivil
formula1 <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"|", wvar ,sep="")))
formulafather <- mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"+","factor(father_cat)","|", wvar ,sep="")))
formulasibling <- mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"+",paste("shsib",setdiff(alter,min(alter)),collapse= "+",sep=""),"|", wvar ,sep="")))
load(file=paste(rootsave,"PMLmwfinalnew.RData",sep=""))
pooleddatamodelwo <- ml.m.w$model

#IDSocial
fformula1 <- mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"|", wvar ,sep="")))
fformulafather <- mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"+","factor(father_cat)","|", wvar ,sep="")))
fformulasibling <- mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"+",paste("shsib",setdiff(alter,min(alter)),collapse= "+",sep=""),"|", wvar ,sep="")))
load(file=paste(rootsave,"PMLmtwfinalnew.RData",sep=""))
pooleddatamodel <- ml.m.w$model

#to include HHNUMhead in pooleddatamodel
pooleddatamodel$HHNUMhead <- NA
pooleddatamodelwo$HHNUMhead <- NA
for(aa in alter) {
  pooleddatamodel[pooleddatamodel$alt==aa,]$HHNUMhead <- pooleddata[pooleddata$alt==aa,]$ID
  pooleddatamodelwo[pooleddatamodelwo$alt==aa,]$HHNUMhead <- pooleddata[pooleddata$alt==aa,]$ID
}

#################
## Estimation exercises

##Father exercise

#wo parish fe
pooleddatamodelfatherwo <- pooleddatamodelwo[pooleddatamodelwo$HHNUMhead %in% HHfather,]
pooleddatafatherwo <- join(pooleddatamodelfatherwo,fatherdata,by='HHNUMhead',type='left')
alterfather <- sort(unique(pooleddatafatherwo$father_cat))

print("Benchmark model with father occupation dummies, wo IDSocial")
ml.m.w <- mnlogit(formulafather, pooleddatafatherwo, "alt")
print(summary(ml.m.w))
save(ml.m.w,file=paste(rootsave,"PMLmwfinalnewFatherocc.RData",sep=""))

print("Benchmark model with father sample NO occupation dummies, wo IDSocial")
ml.m.w <- mnlogit(formula1, pooleddatafatherwo, "alt")
print(summary(ml.m.w))
save(ml.m.w,file=paste(rootsave,"PMLmwfinalnewFather.RData",sep=""))


# with parish fe
pooleddatamodelfather <- pooleddatamodel[pooleddatamodel$HHNUMhead %in% HHfather,]
pooleddatafather <- join(pooleddatamodelfather,fatherdata,by='HHNUMhead',type='left')
alterfather <- sort(unique(pooleddatafather$father_cat))
print("Benchmark model with father occupation dummies")
ml.m.w <- mnlogit(fformulafather, pooleddatafather, "alt")
print(summary(ml.m.w))
save(ml.m.w,file=paste(rootsave,"PMLmtwfinalnewFatherocc.RData",sep=""))

print("Benchmark model with father sample NO occupation dummies")
ml.m.w <- mnlogit(fformula1, pooleddatafather, "alt")
print(summary(ml.m.w))
save(ml.m.w,file=paste(rootsave,"PMLmtwfinalnewFather.RData",sep=""))

##Siblings exercise
#wo parish fe
pooleddatamodelsiblingwo <- pooleddatamodelwo[pooleddatamodelwo$HHNUMhead %in% HHsibling,]
pooleddatasiblingwo <- join(pooleddatamodelsiblingwo,siblingdata,by='HHNUMhead',type='left')
nsibling <- pooleddatasiblingwo %>% count(family_id)
pooleddatasiblingwo <- pooleddatasiblingwo[pooleddatasiblingwo$family_id %in% nsibling[nsibling$n>6,]$family_id & pooleddatasiblingwo$family_id %in% nsibling[nsibling$n<306,]$family_id,] #keeping those families with at least one sibling (6 obs) (and down below those with less than 50 (306obs) siblings ,]$family_id), 
pooleddatasiblingwo$choicealt <- pooleddatasiblingwo$choice*(pooleddatasiblingwo$alt+1)
pooleddatasiblingwo$order <- c(1:nrow(pooleddatasiblingwo))
for(aa in setdiff(alter,min(alter)))  {
  pooleddatasiblingwo[,"shsib"] <- 0
  pooleddatasiblingwo[pooleddatasiblingwo$alt==aa,"shsib"] <- (pooleddatasiblingwo[pooleddatasiblingwo$alt==aa,]$choicealt==(aa+1)) 
  prov <- ddply(pooleddatasiblingwo, .(HHNUMhead), transform, shsib = max(shsib))
  prov <- ddply(prov, .(family_id), transform, shsib = ((sum(shsib)/length(alter))-shsib)/((length(family_id)/length(alter))-1))
  pooleddatasiblingwo[,paste("shsib",aa,sep="")] <- prov[order(prov$order),"shsib"]
}
pooleddatasiblingwo <- pooleddatasiblingwo[,!(names(pooleddatasiblingwo) %in% c("order","choicealt","shsib"))]

print("Benchmark model with sibling sample, wo IDSocial")
ml.m.w <- mnlogit(formula1, pooleddatasiblingwo, "alt")
print(summary(ml.m.w))
save(ml.m.w,file=paste(rootsave,"PMLmwfinalnewSibling.RData",sep=""))

print("Benchmark model with sibling sample, wo IDSocial, with sibling occ")
ml.m.w <- mnlogit(formulasibling, pooleddatasiblingwo, "alt")
print(summary(ml.m.w))
save(ml.m.w,file=paste(rootsave,"PMLmwfinalnewSiblingocc.RData",sep=""))


# with parish fe
pooleddatamodelsibling <- pooleddatamodel[pooleddatamodel$HHNUMhead %in% HHsibling,]
pooleddatasibling <- join(pooleddatamodelsibling,siblingdata,by='HHNUMhead',type='left')
nsibling <- pooleddatasibling %>% count(family_id)
pooleddatasibling <- pooleddatasibling[pooleddatasibling$family_id %in% nsibling[nsibling$n>6,]$family_id & pooleddatasibling$family_id %in% nsibling[nsibling$n<306,]$family_id,] #keeping those families with at least one sibling (and down below those with less than 50 siblings ,]$family_id), 
pooleddatasibling$choicealt <- pooleddatasibling$choice*(pooleddatasibling$alt+1)
pooleddatasibling$order <- c(1:nrow(pooleddatasibling))
for(aa in setdiff(alter,min(alter)))  {
  pooleddatasibling[,"shsib"] <- 0
  pooleddatasibling[pooleddatasiblingwo$alt==aa,"shsib"] <- (pooleddatasibling[pooleddatasibling$alt==aa,]$choicealt==(aa+1)) 
  prov <- ddply(pooleddatasibling, .(HHNUMhead), transform, shsib = max(shsib))
  prov <- ddply(prov, .(family_id), transform, shsib = ((sum(shsib)/length(alter))-shsib)/((length(family_id)/length(alter))-1))
  pooleddatasibling[,paste("shsib",aa,sep="")] <- prov[order(prov$order),"shsib"]
}
pooleddatasibling <- pooleddatasibling[,!(names(pooleddatasibling) %in% c("order","choicealt","shsib"))]


print("Benchmark model with sibling sample, w IDOSocial, wo sibling occ")
ml.m.w <- mnlogit(fformula1, pooleddatasibling, "alt")
print(summary(ml.m.w))
save(ml.m.w,file=paste(rootsave,"PMLmtwfinalnewSibling.RData",sep=""))

print("Benchmark model with sibling sample, w IDOSocial, w sibling occ")
ml.m.w <- mnlogit(fformulasibling, pooleddatasibling, "alt")
print(summary(ml.m.w))
save(ml.m.w,file=paste(rootsave,"PMLmtwfinalnewSiblingocc.RData",sep=""))

sink()
